#clean up
rm(list=ls())

#load packages
library(readxl) #replaces XLConnect
library(foreign)
library(car)

#front matter
setwd("/Volumes/MONOGAN/psrsd/anxiety2016/data/cumulativeANES/")

###CODE THE 2008 OPEN-ENDED RESPONSES###
text=as.data.frame(read_excel("anes2008presOnly.xls",sheet=1,col_names=TRUE))

##Democratic likes##
#examine, recode empty observations
text$candlik_likewhatdpc[1:15]
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-1 Inapplicable"]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_likewhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)
text$candlik_likewhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatdpc); head(text$candlik_likewhatdpc,15)

#check for empty strings, turn to missing
text$candlik_likewhatdpc[text$candlik_likewhatdpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatdpc.list=strsplit(text$candlik_likewhatdpc,split=","); head(candlik_likewhatdpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatdpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatdpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$dem.like=count-empty
head(text$candlik_likewhatdpc,15);head(text$dem.like,15)
tail(text$candlik_likewhatdpc,15);tail(text$dem.like,15)

##Democratic dislikes##
#examine, recode empty observations
text$candlik_dislwhatdpc[1:15]
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatdpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatdpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc, 15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_dislwhatdpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatdpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)
text$candlik_dislwhatdpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatdpc); head(text$candlik_dislwhatdpc,15)

#check for empty strings, turn to missing
text$candlik_dislwhatdpc[text$candlik_dislwhatdpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_dislwhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatdpc.list=strsplit(text$candlik_dislwhatdpc,split=","); head(candlik_dislwhatdpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatdpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatdpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$dem.dislike=count-empty
head(text$candlik_dislwhatdpc,15);head(text$dem.dislike,15)
tail(text$candlik_dislwhatdpc,15);tail(text$dem.dislike,15)

##Republican likes##
#examine, recode empty observations
text$candlik_likewhatrpc[1:15]
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-1 Inapplicable"]=NA
text$candlik_likewhatrpc[text$candlik_likewhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_likewhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_likewhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_likewhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_likewhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)
text$candlik_likewhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_likewhatrpc); head(text$candlik_likewhatrpc,15)

#check for empty strings, turn to missing
text$candlik_likewhatrpc[text$candlik_likewhatrpc ==""]=NA
text$candlik_likewhatdpc[text$candlik_likewhatrpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_likewhatrpc.list=strsplit(text$candlik_likewhatrpc,split=","); head(candlik_likewhatrpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_likewhatrpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_likewhatrpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$rep.like=count-empty
head(text$candlik_likewhatrpc,15);head(text$rep.like,15)
tail(text$candlik_likewhatrpc,15);tail(text$rep.like,15)

##Republican dislikes##
#examine, recode empty observations
text$candlik_dislwhatrpc[1:15]
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-1 Inapplicable"]=NA
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc=="-7 Refused"]=NA
rev(sort(table(text$candlik_dislwhatrpc,useNA="always")))[1:20]

#eliminate final statement of "no" further comment and other common problems
text$candlik_dislwhatrpc=gsub(pattern="no",replacement="",ignore.case=TRUE,x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)

#cut spaces, and turn various delimiters into comma delimiters
text$candlik_dislwhatrpc=gsub(pattern="[.] ",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15) #New line. Periods are now used as delimiters in 2016.
text$candlik_dislwhatrpc=gsub(pattern=" ",replacement="",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="//",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="[\\]",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern="-/-",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)
text$candlik_dislwhatrpc=gsub(pattern=";",replacement=",",x=text$candlik_dislwhatrpc); head(text$candlik_dislwhatrpc,15)

#check for empty strings, turn to missing
text$candlik_dislwhatrpc[text$candlik_dislwhatrpc ==""]=NA
text$candlik_dislwhatrpc[text$candlik_likewhatdpc =="NA"]=NA #New line. This is a new coding in 2016.

#split the string
candlik_dislwhatrpc.list=strsplit(text$candlik_dislwhatrpc,split=","); head(candlik_dislwhatrpc.list,15)

#count the number of comments, record in the data
count=as.numeric(lapply(candlik_dislwhatrpc.list,length));count[1:15];length(count)
empty=as.numeric(lapply(lapply(candlik_dislwhatrpc.list,'[[',1),is.na));empty[1:15];length(empty)
text$rep.dislike=count-empty
head(text$candlik_dislwhatrpc,15);head(text$rep.dislike,15)
tail(text$candlik_dislwhatrpc,15);tail(text$rep.dislike,15)

##Create the end count file##
text.count=subset(text,select=c(caseID,dem.like,dem.dislike,rep.like,rep.dislike))#V160001 is the "caseid"
#write.csv(text.count,"textCount2008.csv",row.names=F)




###QUANTITATIVE RESPONSE MANAGEMENT FOR CUMULATIVE FILE###
#load data
anes.0=read.dta("anes_timeseries_cdf_stata12.dta",convert.factors=F)

#subset and name
anes.1=subset(anes.0,select=c(VCF0006, VCF0004, VCF0704a, VCF0301, VCF0358, VCF0359, VCF0360, VCF0361, VCF0370, VCF0371, VCF0372, VCF0373, VCF0401, VCF0402, VCF0405, VCF0406, VCF0839, VCF9086, VCF9094, VCF0809, VCF9087, VCF9095, VCF0843, VCF9081, VCF9089))
names(anes.1)<-c("caseid", "year", "postvote_presvtwho", "pid_x", "dAngry", "dAfraid", "dHope", "dProud", "rAngry", "rAfraid", "rHope", "rProud", "candlik_likedpc", "candlik_disldpc", "candlik_likerpc", "candlik_dislrpc", "spsrvpr_ssself", "spsrvpr_ssdpc", "spsrvpr_ssrpc", "guarpr_self", "guarpr_dpc", "guarpr_rpc", "defsppr_self", "defsppr_dpc", "defsppr_rpc")
anes.2<-subset(anes.1,subset=year%in%c(1984, 1988, 1992, 1996, 2000, 2004, 2008))

###VOTE AND PARTY###
anes.2$postvote_presvtwho[anes.2$postvote_presvtwho==0]<-NA
anes.2$vote.rep<-anes.2$postvote_presvtwho-1

anes.2$pid_x[anes.2$pid_x==0|anes.2$pid_x==4]<-NA
anes.2$pid_x<-(anes.2$pid_x-1)/6

anes<-subset(anes.2,subset=!is.na(postvote_presvtwho) & !is.na(pid_x))

###ISSUE SCALES###
anes$spsrvpr_ssself[anes$spsrvpr_ssself>7 | anes$spsrvpr_ssself<1]<-NA
anes$spsrvpr_ssdpc[anes$spsrvpr_ssdpc>7 | anes$spsrvpr_ssdpc <1]<-NA
anes$spsrvpr_ssrpc[anes$spsrvpr_ssrpc>7 | anes$spsrvpr_ssrpc <1]<-NA
anes$guarpr_self[anes$guarpr_self>7 | anes$guarpr_self <1]<-NA
anes$guarpr_dpc[anes$guarpr_dpc>7 | anes$guarpr_dpc <1]<-NA
anes$guarpr_rpc[anes$guarpr_rpc>7 | anes$guarpr_rpc <1]<-NA
anes$defsppr_self[anes$defsppr_self>7 | anes$defsppr_self <1]<-NA
anes$defsppr_dpc[anes$defsppr_dpc>7 | anes$defsppr_dpc <1]<-NA
anes$defsppr_rpc[anes$defsppr_rpc>7 | anes$defsppr_rpc <1]<-NA

#issue advantage measure
anes$issues=abs(anes$spsrvpr_ssdpc-anes$spsrvpr_ssself)-abs(anes$spsrvpr_ssrpc-anes$spsrvpr_ssself)+abs(anes$defsppr_dpc-anes$defsppr_self)-abs(anes$defsppr_rpc-anes$defsppr_self)+abs(anes$guarpr_dpc-anes$guarpr_self)-abs(anes$guarpr_rpc-anes$guarpr_self)
anes$issues =(anes$issues-min(anes$issues,na.rm=T))/(max(anes$issues,na.rm=T)-min(anes$issues,na.rm=T))

###EMOTIONS###
anes$dAngry<-recode(anes$dAngry,"1=1;2=0;8=NA;9=NA")
anes$dAfraid<-recode(anes$dAfraid,"1=1;2=0;8=NA;9=NA")
anes$dHope<-recode(anes$dHope,"1=1;2=0;8=NA;9=NA")
anes$dProud<-recode(anes$dProud,"1=1;2=0;8=NA;9=NA")
anes$rAngry<-recode(anes$rAngry,"1=1;2=0;8=NA;9=NA")
anes$rAfraid<-recode(anes$rAfraid,"1=1;2=0;8=NA;9=NA")
anes$rHope<-recode(anes$rHope,"1=1;2=0;8=NA;9=NA")
anes$rProud<-recode(anes$rProud,"1=1;2=0;8=NA;9=NA")
table(anes$dAngry, anes$year)

anes$ang.own<-ifelse(anes$pid_x>.5,anes$rAngry,anes$dAngry)
anes$hp.own<-ifelse(anes$pid_x>.5,anes$rHope,anes$dHope)
anes$afr.own<-ifelse(anes$pid_x>.5,anes$rAfraid,anes$dAfraid)
anes$prd.own<-ifelse(anes$pid_x>.5,anes$rProud,anes$dProud)

###PERSONAL QUALITIES###
anes$personal<-anes$candlik_likerpc+anes$candlik_disldpc-anes$candlik_dislrpc-anes$candlik_likedpc
anes$personal<-(anes$personal-min(anes$personal,na.rm=T))/(max(anes$personal,na.rm=T)-min(anes$personal,na.rm=T))

#plug in three years of open-ended responses
text.2008<-read.csv("textCount2008.csv")
text.2012<-read.csv("textCount2012.csv")
text.2016<-read.csv("textCount2016.csv")

text.2008$rep.like[text.2008$rep.like>5]<-5
text.2008$rep.dislike[text.2008$rep.dislike>5]<-5
text.2008$dem.like[text.2008$dem.like>5]<-5
text.2008$dem.dislike[text.2008$dem.dislike>5]<-5

text.2012$rep.like[text.2012$rep.like>5]<-5
text.2012$rep.dislike[text.2012$rep.dislike>5]<-5
text.2012$dem.like[text.2012$dem.like>5]<-5
text.2012$dem.dislike[text.2012$dem.dislike>5]<-5

text.2016$rep.like[text.2016$rep.like>5]<-5
text.2016$rep.dislike[text.2016$rep.dislike>5]<-5
text.2016$dem.like[text.2016$dem.like>5]<-5
text.2016$dem.dislike[text.2016$dem.dislike>5]<-5

text.2008$year<-2008
text.2012$year<-2012
text.2016$year<-2016
names(text.2008)[1]<-"caseid"
names(text.2016)[1]<-"caseid"

text.2008$personal.08<-text.2008$rep.like +text.2008$dem.dislike-text.2008$rep.dislike-text.2008$dem.like
text.2008$personal.08<-(text.2008$personal.08-min(text.2008$personal.08,na.rm=T))/(max(text.2008$personal.08,na.rm=T)-min(text.2008$personal.08,na.rm=T))

text.2012$personal.12<-text.2012$rep.like +text.2012$dem.dislike-text.2012$rep.dislike-text.2012$dem.like
text.2012$personal.12<-(text.2012$personal.12-min(text.2012$personal.12,na.rm=T))/(max(text.2012$personal.12,na.rm=T)-min(text.2012$personal.12,na.rm=T))

text.2016$personal.16<-text.2016$rep.like +text.2016$dem.dislike-text.2016$rep.dislike-text.2016$dem.like
text.2016$personal.16<-(text.2016$personal.16-min(text.2016$personal.16,na.rm=T))/(max(text.2016$personal.16,na.rm=T)-min(text.2016$personal.16,na.rm=T))

text.2008<-text.2008[,c("caseid","year","personal.08")]
text.2012<-text.2012[,c("caseid","year","personal.12")]
text.2016<-text.2016[,c("caseid","year","personal.16")]

anes<-merge(x=anes,y=text.2008,by=c("caseid","year"),all.x=T,all.y=F)
anes<-merge(x=anes,y=text.2012,by=c("caseid","year"),all.x=T,all.y=F)

anes$personal[is.na(anes$personal) & anes$year==2008]<-anes$personal.08[is.na(anes$personal) & anes$year==2008]
anes$personal[is.na(anes$personal) & anes$year==2012]<-anes$personal.12[is.na(anes$personal) & anes$year==2012]

anes<-subset(anes,select=-c(personal.08,personal.12,postvote_presvtwho,candlik_likedpc,candlik_disldpc,candlik_likerpc,candlik_dislrpc,spsrvpr_ssself,spsrvpr_ssdpc,spsrvpr_ssrpc,guarpr_self,guarpr_dpc,guarpr_rpc,defsppr_self,defsppr_dpc,defsppr_rpc,dAngry,dAfraid,dHope,dProud,rAngry,rAfraid,rHope,rProud))

###LOAD 2012 DATA###
anes.2012<-read.table("anes12.txt",header=T)

#recode emotions to binary
anes.2012$ang.own<-as.numeric(anes.2012$ang.own>0)
anes.2012$hp.own<-as.numeric(anes.2012$hp.own>0)
anes.2012$afr.own<-as.numeric(anes.2012$afr.own>0)
anes.2012$prd.own<-as.numeric(anes.2012$prd.own>0)

#fix the personal qualities
anes.2012<-merge(x=anes.2012,y=text.2012,by="caseid",all.x=T,all.y=F)
anes.2012$personal<-anes.2012$personal.12

#fix the issue scale
time.issue.12<-read.csv("timeIssue12.csv",header=T)
anes.2012<-merge(x=anes.2012,y=time.issue.12,by="caseid",all.x=T,all.y=F)
anes.2012$issues<-anes.2012$issues.3
anes.2012<-subset(anes.2012,select=-c(personal.12,issues.3))

#reorder and merge
anes.2012$year<-2012
anes.2012<-anes.2012[,c("caseid","year","pid_x","vote.rep","issues","ang.own","hp.own","afr.own","prd.own","personal")]
anes<-rbind(anes,anes.2012)  

###LOAD 2016 DATA###
anes.2016<-read.table("anes16.txt",header=T)

#recode emotions to binary
anes.2016$ang.own<-as.numeric(anes.2016$ang.own>0)
anes.2016$hp.own<-as.numeric(anes.2016$hp.own>0)
anes.2016$afr.own<-as.numeric(anes.2016$afr.own>0)
anes.2016$prd.own<-as.numeric(anes.2016$prd.own>0)

#fix the personal qualities
anes.2016<-merge(x=anes.2016,y=text.2016,by="caseid",all.x=T,all.y=F)
anes.2016$personal<-anes.2016$personal.16

#fix the issue scale
time.issue.16<-read.csv("timeIssue16.csv",header=T)
anes.2016<-merge(x=anes.2016,y=time.issue.16,by="caseid",all.x=T,all.y=F)
anes.2016$issues<-anes.2016$issues.3
anes.2016<-subset(anes.2016,select=-c(personal.16,disg.own,issues.3))

#reorder and merge
anes.2016$year<-2016
anes.2016<-anes.2016[,c("caseid","year","pid_x","vote.rep","issues","ang.own","hp.own","afr.own","prd.own","personal")]
anes<-rbind(anes,anes.2016)  


###WRITE DATA###
anes=na.omit(subset(anes,select=c(caseid,year,vote.rep,pid_x,issues,personal,ang.own,hp.own,afr.own,prd.own)))
write.table(anes,"anesCumulative.txt",row.names=F)

#TEST.MODEL<-glm(vote.rep~pid_x+personal+issues+as.factor(year),family=binomial(link="logit"),data=anes); summary(TEST.MODEL)

